# Importing Libraries
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, RandomForestRegressor, GradientBoostingRegressor
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.tree import DecisionTreeClassifier, plot_tree, DecisionTreeRegressor
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier, RandomForestRegressor, GradientBoostingRegressor
from sklearn.metrics import accuracy_score, mean_squared_error
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay, precision_score, recall_score
from sklearn.metrics import classification_report, mean_squared_error, r2_score
from sklearn.model_selection import train_test_split, GridSearchCV, cross_val_score, StratifiedKFold, KFold
# Loading data from Google Drive
data_path = '/Users/sowjanyapadala/Desktop/Coursework/Q3/DATA_5322_Statistical_Machine_Learning2/Final_Project/Spotify_project/Dataset/SpotifyFeatures.csv'
spotify_df = pd.read_csv(data_path)
# Clean duplicated Tracks
spotify_df= spotify_df.drop_duplicates(subset=['track_id'])
count = spotify_df['track_id'].value_counts()
num_duplicated_ids = (count > 1).sum()
print("Number of unique tracks that are duplicated after cleaning:", num_duplicated_ids)
Number of unique tracks that are duplicated after cleaning: 0
# Strip Leading/Trailing Whitespace and Fix Case
spotify_df['genre'] = spotify_df['genre'].str.replace('’', "'", regex=False)
spotify_df['genre'].unique()
array(['Movie', 'R&B', 'A Capella', 'Alternative', 'Country', 'Dance',
'Electronic', 'Anime', 'Folk', 'Blues', 'Opera', 'Hip-Hop',
"Children's Music", 'Rap', 'Indie', 'Classical', 'Pop', 'Reggae',
'Reggaeton', 'Jazz', 'Rock', 'Ska', 'Comedy', 'Soul', 'Soundtrack',
'World'], dtype=object)
# Mapping similar genre
genre_mapping = {
'Pop': 'Pop/Rock',
'Rock': 'Pop/Rock',
'Indie': 'Pop/Rock',
'Alternative': 'Pop/Rock',
'Soul': 'Pop/Rock',
'Hip-Hop': 'Hip-Hop/Rap/R&B',
'Rap': 'Hip-Hop/Rap/R&B',
'R&B': 'Hip-Hop/Rap/R&B',
'Dance': 'Dance/Electronic',
'Electronic': 'Dance/Electronic',
'Reggaeton': 'Dance/Electronic',
'Reggae': 'Dance/Electronic',
'Ska': 'Dance/Electronic',
'Jazz': 'Jazz/Blues',
'Blues': 'Jazz/Blues',
'Classical': 'Classical/Opera',
'Opera': 'Classical/Opera',
'Country': 'Country/Folk',
'Folk': 'Country/Folk',
'World': 'World/Soundtrack',
'Soundtrack': 'World/Soundtrack',
'Movie': 'Movie/Comedy',
'Comedy': 'Movie/Comedy',
'Children\'s Music': 'Children/Anime',
'Anime': 'Children/Anime',
'A Capella': 'Pop/Rock'
}
spotify_df['genre_grouped'] = spotify_df['genre'].map(genre_mapping)
print(spotify_df['genre_grouped'].value_counts())
genre_grouped Dance/Electronic 42384 Pop/Rock 21606 Movie/Comedy 17476 Classical/Opera 16991 Jazz/Blues 16535 World/Soundtrack 16453 Children/Anime 15676 Country/Folk 15431 Hip-Hop/Rap/R&B 14222 Name: count, dtype: int64
# Log Transform
spotify_df['duration_ms'] = np.log1p(spotify_df['duration_ms'])
spotify_df['instrumentalness'] = np.log1p(spotify_df['instrumentalness'])
spotify_df['speechiness'] = np.log1p(spotify_df['speechiness'])
# Encode Categorical Data
for col in ['key', 'mode', 'time_signature']:
le = LabelEncoder()
spotify_df[col] = le.fit_transform(spotify_df[col])
numeric_features = [
'popularity', 'acousticness', 'danceability', 'duration_ms', 'energy',
'instrumentalness', 'liveness', 'loudness', 'speechiness', 'tempo', 'valence'
]
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
spotify_df[numeric_features] = scaler.fit_transform(spotify_df[numeric_features])
spotify_df[numeric_features].head(5)
| popularity | acousticness | danceability | duration_ms | energy | instrumentalness | liveness | loudness | speechiness | tempo | valence | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | -2.085747 | 0.564740 | -0.798733 | -1.752675 | 1.279646 | -0.547439 | 0.575611 | 1.299283 | -0.384671 | 1.588677 | 1.353170 |
| 1 | -2.028246 | -0.431708 | 0.257016 | -1.015026 | 0.652467 | -0.547439 | -0.348444 | 0.715907 | -0.165891 | 1.813226 | 1.360637 |
| 2 | -1.913244 | 1.495668 | 0.640447 | -0.526019 | -1.544471 | -0.547439 | -0.575904 | -0.585001 | -0.491152 | -0.565544 | -0.312134 |
| 3 | -2.085747 | 0.815900 | -1.581352 | -0.778149 | -0.837536 | -0.547439 | -0.597228 | -0.319034 | -0.469460 | 1.741558 | -0.838608 |
| 4 | -1.855743 | 1.490208 | -1.103376 | -2.173113 | -1.203692 | -0.051906 | -0.106768 | -1.721889 | -0.429543 | 0.746124 | -0.229989 |
import seaborn as sns
import matplotlib.pyplot as plt
correlation_matrix = spotify_df[numeric_features].corr()
plt.figure(figsize=(12, 8))
sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', linewidths=0.5)
plt.title('Correlation Heatmap of Numeric Features')
plt.show()
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, BatchNormalization
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping
import time
X = spotify_df[numeric_features].values
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(spotify_df['genre_grouped'])
y_categorical = to_categorical(y_encoded)
for i, label in enumerate(label_encoder.classes_):
print(f"{i}: {label}")
0: Children/Anime 1: Classical/Opera 2: Country/Folk 3: Dance/Electronic 4: Hip-Hop/Rap/R&B 5: Jazz/Blues 6: Movie/Comedy 7: Pop/Rock 8: World/Soundtrack
X_train, X_test, y_train, y_test = train_test_split(
X, y_categorical, test_size=0.2, random_state=42, stratify=y_encoded
)
# Get the number of features and classes
input_shape = X_train.shape[1]
num_classes = y_train.shape[1]
# Build the model
model1_nn = Sequential()
model1_nn.add(Dense(256, activation='relu', input_shape=(input_shape,)))
model1_nn.add(BatchNormalization())
model1_nn.add(Dropout(0.3))
model1_nn.add(Dense(128, activation='relu'))
model1_nn.add(BatchNormalization())
model1_nn.add(Dropout(0.3))
model1_nn.add(Dense(64, activation='relu'))
model1_nn.add(BatchNormalization())
model1_nn.add(Dropout(0.3))
model1_nn.add(Dense(num_classes, activation='softmax'))
# Compile the model
model1_nn.compile(
optimizer=Adam(learning_rate=0.001),
loss='categorical_crossentropy',
metrics=['accuracy', 'Precision', 'Recall']
)
/opt/anaconda3/lib/python3.12/site-packages/keras/src/layers/core/dense.py:87: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead. super().__init__(activity_regularizer=activity_regularizer, **kwargs)
early_stopping = EarlyStopping(
monitor='val_accuracy',
patience=10,
restore_best_weights=True
)
start_time = time.time() # Record the start time
history1 = model1_nn.fit(
X_train, y_train,
validation_data=(X_test, y_test),
epochs=100,
batch_size=32,
verbose=1,
callbacks=[early_stopping]
)
end_time = time.time() # Record the end time
total_time = end_time - start_time
print(f"Training completed in {total_time:.2f} seconds.")
Epoch 1/100 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 24s 5ms/step - Precision: 0.6331 - Recall: 0.3138 - accuracy: 0.4883 - loss: 1.4099 - val_Precision: 0.7422 - val_Recall: 0.4143 - val_accuracy: 0.5962 - val_loss: 1.0854 Epoch 2/100 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 20s 4ms/step - Precision: 0.7056 - Recall: 0.3804 - accuracy: 0.5619 - loss: 1.1847 - val_Precision: 0.7359 - val_Recall: 0.4472 - val_accuracy: 0.6070 - val_loss: 1.0554 Epoch 3/100 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 21s 5ms/step - Precision: 0.7125 - Recall: 0.4020 - accuracy: 0.5771 - loss: 1.1487 - val_Precision: 0.7302 - val_Recall: 0.4505 - val_accuracy: 0.6078 - val_loss: 1.0506 Epoch 4/100 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 19s 4ms/step - Precision: 0.7192 - Recall: 0.4118 - accuracy: 0.5814 - loss: 1.1320 - val_Precision: 0.7451 - val_Recall: 0.4522 - val_accuracy: 0.6117 - val_loss: 1.0384 Epoch 5/100 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 23s 5ms/step - Precision: 0.7225 - Recall: 0.4176 - accuracy: 0.5834 - loss: 1.1238 - val_Precision: 0.7498 - val_Recall: 0.4492 - val_accuracy: 0.6136 - val_loss: 1.0322 Epoch 6/100 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 43s 5ms/step - Precision: 0.7226 - Recall: 0.4198 - accuracy: 0.5840 - loss: 1.1196 - val_Precision: 0.7494 - val_Recall: 0.4572 - val_accuracy: 0.6165 - val_loss: 1.0303 Epoch 7/100 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 38s 5ms/step - Precision: 0.7234 - Recall: 0.4221 - accuracy: 0.5880 - loss: 1.1144 - val_Precision: 0.7473 - val_Recall: 0.4606 - val_accuracy: 0.6149 - val_loss: 1.0244 Epoch 8/100 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 19s 4ms/step - Precision: 0.7264 - Recall: 0.4241 - accuracy: 0.5892 - loss: 1.1089 - val_Precision: 0.7426 - val_Recall: 0.4723 - val_accuracy: 0.6190 - val_loss: 1.0174 Epoch 9/100 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 23s 5ms/step - Precision: 0.7240 - Recall: 0.4269 - accuracy: 0.5897 - loss: 1.1083 - val_Precision: 0.7530 - val_Recall: 0.4627 - val_accuracy: 0.6197 - val_loss: 1.0163 Epoch 10/100 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 41s 5ms/step - Precision: 0.7273 - Recall: 0.4299 - accuracy: 0.5942 - loss: 1.1024 - val_Precision: 0.7438 - val_Recall: 0.4690 - val_accuracy: 0.6196 - val_loss: 1.0169 Epoch 11/100 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 20s 5ms/step - Precision: 0.7259 - Recall: 0.4296 - accuracy: 0.5940 - loss: 1.1013 - val_Precision: 0.7603 - val_Recall: 0.4545 - val_accuracy: 0.6226 - val_loss: 1.0119 Epoch 12/100 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 21s 5ms/step - Precision: 0.7290 - Recall: 0.4321 - accuracy: 0.5941 - loss: 1.0953 - val_Precision: 0.7568 - val_Recall: 0.4590 - val_accuracy: 0.6215 - val_loss: 1.0138 Epoch 13/100 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 43s 5ms/step - Precision: 0.7313 - Recall: 0.4341 - accuracy: 0.5969 - loss: 1.0949 - val_Precision: 0.7527 - val_Recall: 0.4664 - val_accuracy: 0.6200 - val_loss: 1.0105 Epoch 14/100 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 41s 5ms/step - Precision: 0.7285 - Recall: 0.4340 - accuracy: 0.5966 - loss: 1.0926 - val_Precision: 0.7602 - val_Recall: 0.4586 - val_accuracy: 0.6229 - val_loss: 1.0067 Epoch 15/100 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 20s 4ms/step - Precision: 0.7296 - Recall: 0.4355 - accuracy: 0.5961 - loss: 1.0886 - val_Precision: 0.7468 - val_Recall: 0.4794 - val_accuracy: 0.6227 - val_loss: 1.0025 Epoch 16/100 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 21s 5ms/step - Precision: 0.7285 - Recall: 0.4357 - accuracy: 0.5954 - loss: 1.0894 - val_Precision: 0.7572 - val_Recall: 0.4703 - val_accuracy: 0.6257 - val_loss: 1.0047 Epoch 17/100 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 41s 5ms/step - Precision: 0.7297 - Recall: 0.4353 - accuracy: 0.5965 - loss: 1.0886 - val_Precision: 0.7542 - val_Recall: 0.4689 - val_accuracy: 0.6225 - val_loss: 1.0053 Epoch 18/100 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 20s 4ms/step - Precision: 0.7299 - Recall: 0.4353 - accuracy: 0.5959 - loss: 1.0881 - val_Precision: 0.7579 - val_Recall: 0.4643 - val_accuracy: 0.6246 - val_loss: 1.0015 Epoch 19/100 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 22s 5ms/step - Precision: 0.7310 - Recall: 0.4378 - accuracy: 0.5987 - loss: 1.0829 - val_Precision: 0.7601 - val_Recall: 0.4681 - val_accuracy: 0.6251 - val_loss: 1.0040 Epoch 20/100 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 43s 5ms/step - Precision: 0.7332 - Recall: 0.4420 - accuracy: 0.6010 - loss: 1.0768 - val_Precision: 0.7544 - val_Recall: 0.4662 - val_accuracy: 0.6239 - val_loss: 1.0017 Epoch 21/100 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 37s 4ms/step - Precision: 0.7327 - Recall: 0.4382 - accuracy: 0.5991 - loss: 1.0828 - val_Precision: 0.7435 - val_Recall: 0.4897 - val_accuracy: 0.6257 - val_loss: 0.9985 Epoch 22/100 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 21s 5ms/step - Precision: 0.7327 - Recall: 0.4413 - accuracy: 0.6002 - loss: 1.0806 - val_Precision: 0.7549 - val_Recall: 0.4727 - val_accuracy: 0.6267 - val_loss: 0.9998 Epoch 23/100 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 40s 5ms/step - Precision: 0.7330 - Recall: 0.4411 - accuracy: 0.6012 - loss: 1.0796 - val_Precision: 0.7477 - val_Recall: 0.4770 - val_accuracy: 0.6251 - val_loss: 1.0023 Epoch 24/100 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 20s 4ms/step - Precision: 0.7310 - Recall: 0.4428 - accuracy: 0.6019 - loss: 1.0766 - val_Precision: 0.7505 - val_Recall: 0.4798 - val_accuracy: 0.6258 - val_loss: 0.9989 Epoch 25/100 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 21s 5ms/step - Precision: 0.7330 - Recall: 0.4435 - accuracy: 0.6023 - loss: 1.0748 - val_Precision: 0.7506 - val_Recall: 0.4809 - val_accuracy: 0.6261 - val_loss: 0.9963 Epoch 26/100 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 40s 4ms/step - Precision: 0.7351 - Recall: 0.4450 - accuracy: 0.6013 - loss: 1.0767 - val_Precision: 0.7619 - val_Recall: 0.4640 - val_accuracy: 0.6259 - val_loss: 0.9993 Epoch 27/100 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 20s 4ms/step - Precision: 0.7322 - Recall: 0.4403 - accuracy: 0.6018 - loss: 1.0786 - val_Precision: 0.7529 - val_Recall: 0.4816 - val_accuracy: 0.6272 - val_loss: 0.9965 Epoch 28/100 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 21s 5ms/step - Precision: 0.7343 - Recall: 0.4472 - accuracy: 0.6041 - loss: 1.0695 - val_Precision: 0.7568 - val_Recall: 0.4760 - val_accuracy: 0.6279 - val_loss: 0.9920 Epoch 29/100 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 21s 5ms/step - Precision: 0.7342 - Recall: 0.4426 - accuracy: 0.6023 - loss: 1.0747 - val_Precision: 0.7558 - val_Recall: 0.4741 - val_accuracy: 0.6275 - val_loss: 0.9961 Epoch 30/100 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 22s 5ms/step - Precision: 0.7361 - Recall: 0.4439 - accuracy: 0.6036 - loss: 1.0709 - val_Precision: 0.7550 - val_Recall: 0.4772 - val_accuracy: 0.6279 - val_loss: 0.9951 Epoch 31/100 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 39s 5ms/step - Precision: 0.7334 - Recall: 0.4436 - accuracy: 0.6026 - loss: 1.0757 - val_Precision: 0.7669 - val_Recall: 0.4666 - val_accuracy: 0.6303 - val_loss: 0.9926 Epoch 32/100 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 20s 5ms/step - Precision: 0.7353 - Recall: 0.4450 - accuracy: 0.6039 - loss: 1.0681 - val_Precision: 0.7587 - val_Recall: 0.4755 - val_accuracy: 0.6290 - val_loss: 0.9920 Epoch 33/100 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 20s 5ms/step - Precision: 0.7343 - Recall: 0.4410 - accuracy: 0.5999 - loss: 1.0735 - val_Precision: 0.7532 - val_Recall: 0.4839 - val_accuracy: 0.6298 - val_loss: 0.9906 Epoch 34/100 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 20s 4ms/step - Precision: 0.7328 - Recall: 0.4442 - accuracy: 0.6026 - loss: 1.0748 - val_Precision: 0.7605 - val_Recall: 0.4696 - val_accuracy: 0.6288 - val_loss: 0.9924 Epoch 35/100 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 22s 5ms/step - Precision: 0.7340 - Recall: 0.4429 - accuracy: 0.6030 - loss: 1.0733 - val_Precision: 0.7560 - val_Recall: 0.4811 - val_accuracy: 0.6316 - val_loss: 0.9906 Epoch 36/100 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 41s 5ms/step - Precision: 0.7356 - Recall: 0.4495 - accuracy: 0.6058 - loss: 1.0666 - val_Precision: 0.7615 - val_Recall: 0.4743 - val_accuracy: 0.6285 - val_loss: 0.9893 Epoch 37/100 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 19s 4ms/step - Precision: 0.7363 - Recall: 0.4456 - accuracy: 0.6040 - loss: 1.0696 - val_Precision: 0.7504 - val_Recall: 0.4838 - val_accuracy: 0.6257 - val_loss: 0.9944 Epoch 38/100 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 22s 5ms/step - Precision: 0.7359 - Recall: 0.4485 - accuracy: 0.6030 - loss: 1.0690 - val_Precision: 0.7640 - val_Recall: 0.4712 - val_accuracy: 0.6298 - val_loss: 0.9888 Epoch 39/100 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 19s 4ms/step - Precision: 0.7343 - Recall: 0.4403 - accuracy: 0.6014 - loss: 1.0758 - val_Precision: 0.7618 - val_Recall: 0.4736 - val_accuracy: 0.6287 - val_loss: 0.9893 Epoch 40/100 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 22s 5ms/step - Precision: 0.7375 - Recall: 0.4462 - accuracy: 0.6042 - loss: 1.0709 - val_Precision: 0.7523 - val_Recall: 0.4808 - val_accuracy: 0.6277 - val_loss: 0.9908 Epoch 41/100 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 41s 5ms/step - Precision: 0.7354 - Recall: 0.4469 - accuracy: 0.6041 - loss: 1.0674 - val_Precision: 0.7585 - val_Recall: 0.4769 - val_accuracy: 0.6302 - val_loss: 0.9901 Epoch 42/100 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 40s 5ms/step - Precision: 0.7375 - Recall: 0.4503 - accuracy: 0.6065 - loss: 1.0643 - val_Precision: 0.7648 - val_Recall: 0.4688 - val_accuracy: 0.6297 - val_loss: 0.9888 Epoch 43/100 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 40s 5ms/step - Precision: 0.7372 - Recall: 0.4441 - accuracy: 0.6014 - loss: 1.0726 - val_Precision: 0.7587 - val_Recall: 0.4789 - val_accuracy: 0.6302 - val_loss: 0.9901 Epoch 44/100 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 20s 4ms/step - Precision: 0.7362 - Recall: 0.4492 - accuracy: 0.6052 - loss: 1.0663 - val_Precision: 0.7554 - val_Recall: 0.4776 - val_accuracy: 0.6285 - val_loss: 0.9899 Epoch 45/100 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 21s 4ms/step - Precision: 0.7377 - Recall: 0.4496 - accuracy: 0.6067 - loss: 1.0625 - val_Precision: 0.7662 - val_Recall: 0.4711 - val_accuracy: 0.6312 - val_loss: 0.9867 Training completed in 1231.25 seconds.
results = model1_nn.evaluate(X_test, y_test, verbose=0)
print(f"Test Accuracy: {results[1]:.4f}")
print(f"Test Precision: {results[2]:.4f}")
print(f"Test Recall: {results[3]:.4f}")
Test Accuracy: 0.6316 Test Precision: 0.7560 Test Recall: 0.4811
import matplotlib.pyplot as plt
plt.figure(figsize=(20, 5))
# Precision Plot
plt.subplot(1, 4, 1)
plt.plot(history1.history['Precision'], label='Train Precision')
plt.plot(history1.history['val_Precision'], label='Validation Precision')
plt.xlabel('Epoch')
plt.ylabel('Precision')
plt.title('Precision over Epochs')
plt.legend()
# Recall Plot
plt.subplot(1, 4, 2)
plt.plot(history1.history['Recall'], label='Train Recall')
plt.plot(history1.history['val_Recall'], label='Validation Recall')
plt.xlabel('Epoch')
plt.ylabel('Recall')
plt.title('Recall over Epochs')
plt.legend()
# Accuracy Plot
plt.subplot(1, 4, 3)
plt.plot(history1.history['accuracy'], label='Train Accuracy')
plt.plot(history1.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Accuracy over Epochs')
plt.legend()
# Loss Plot
plt.subplot(1, 4, 4)
plt.plot(history1.history['loss'], label='Train Loss')
plt.plot(history1.history['val_loss'], label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Loss over Epochs')
plt.legend()
plt.tight_layout()
plt.show()
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
# Predict on the test set
y_pred_probs1 = model1_nn.predict(X_test)
# Convert one-hot predictions to class labels
y_pred1 = np.argmax(y_pred_probs1, axis=1)
# Convert one-hot true labels to class labels
y_true1 = np.argmax(y_test, axis=1)
cm1 = confusion_matrix(y_true1, y_pred1)
class_names = label_encoder.classes_
disp = ConfusionMatrixDisplay(confusion_matrix=cm1, display_labels=class_names)
fig, ax = plt.subplots(figsize=(8,8))
disp.plot(cmap='Blues', ax=ax, xticks_rotation=90)
plt.title('Confusion Matrix')
plt.show()
1105/1105 ━━━━━━━━━━━━━━━━━━━━ 2s 1ms/step
Performance Metrics (Test Set):
Accuracy: 63.16%
Precision: 75.60%
Recall: 48.11%
These results indicate that the model is effective in predicting genre classes with high precision which means that out of all the tracks the model predicted that the track belongs to particular genre approximately 75.6% were actually correct. However, the lower recall suggests that it misses several true positive cases.
The training/validation accuracy and precision curves shows a consistent upward trend, with validation outperforming training slightly which indicates good generalization.
# Build the model with 2 hidden layers
model2_nn = Sequential()
# First hidden layer
model2_nn.add(Dense(256, activation='relu', input_shape=(input_shape,)))
model2_nn.add(BatchNormalization())
model2_nn.add(Dropout(0.3))
# Second hidden layer
model2_nn.add(Dense(128, activation='relu'))
model2_nn.add(BatchNormalization())
model2_nn.add(Dropout(0.3))
# Output layer
model2_nn.add(Dense(num_classes, activation='softmax'))
# Compile the model
model2_nn.compile(
optimizer=Adam(learning_rate=0.001),
loss='categorical_crossentropy',
metrics=['accuracy', 'Precision', 'Recall']
)
/usr/local/lib/python3.11/dist-packages/keras/src/layers/core/dense.py:87: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead. super().__init__(activity_regularizer=activity_regularizer, **kwargs)
start_time = time.time()
history2 = model2_nn.fit(
X_train, y_train,
validation_data=(X_test, y_test),
epochs=45,
batch_size=32,
verbose=1,
callbacks=[early_stopping]
)
end_time = time.time() # Record the end time
total_time = end_time - start_time
print(f"Training completed in {total_time:.2f} seconds.")
Epoch 1/45 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 21s 4ms/step - Precision: 0.6358 - Recall: 0.3529 - accuracy: 0.5106 - loss: 1.3504 - val_Precision: 0.7355 - val_Recall: 0.4145 - val_accuracy: 0.5959 - val_loss: 1.0827 Epoch 2/45 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 18s 4ms/step - Precision: 0.7095 - Recall: 0.4040 - accuracy: 0.5719 - loss: 1.1471 - val_Precision: 0.7391 - val_Recall: 0.4484 - val_accuracy: 0.6100 - val_loss: 1.0467 Epoch 3/45 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 18s 4ms/step - Precision: 0.7153 - Recall: 0.4179 - accuracy: 0.5831 - loss: 1.1179 - val_Precision: 0.7455 - val_Recall: 0.4520 - val_accuracy: 0.6140 - val_loss: 1.0371 Epoch 4/45 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 19s 4ms/step - Precision: 0.7208 - Recall: 0.4210 - accuracy: 0.5841 - loss: 1.1116 - val_Precision: 0.7498 - val_Recall: 0.4546 - val_accuracy: 0.6182 - val_loss: 1.0263 Epoch 5/45 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 22s 4ms/step - Precision: 0.7215 - Recall: 0.4302 - accuracy: 0.5922 - loss: 1.0939 - val_Precision: 0.7499 - val_Recall: 0.4538 - val_accuracy: 0.6164 - val_loss: 1.0262 Epoch 6/45 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 21s 4ms/step - Precision: 0.7240 - Recall: 0.4331 - accuracy: 0.5913 - loss: 1.0917 - val_Precision: 0.7505 - val_Recall: 0.4467 - val_accuracy: 0.6158 - val_loss: 1.0236 Epoch 7/45 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 20s 4ms/step - Precision: 0.7286 - Recall: 0.4335 - accuracy: 0.5952 - loss: 1.0853 - val_Precision: 0.7394 - val_Recall: 0.4686 - val_accuracy: 0.6162 - val_loss: 1.0200 Epoch 8/45 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 18s 4ms/step - Precision: 0.7299 - Recall: 0.4386 - accuracy: 0.5974 - loss: 1.0790 - val_Precision: 0.7491 - val_Recall: 0.4686 - val_accuracy: 0.6201 - val_loss: 1.0132 Epoch 9/45 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 19s 4ms/step - Precision: 0.7291 - Recall: 0.4372 - accuracy: 0.5973 - loss: 1.0776 - val_Precision: 0.7496 - val_Recall: 0.4668 - val_accuracy: 0.6223 - val_loss: 1.0114 Epoch 10/45 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 22s 4ms/step - Precision: 0.7295 - Recall: 0.4399 - accuracy: 0.5990 - loss: 1.0777 - val_Precision: 0.7503 - val_Recall: 0.4685 - val_accuracy: 0.6241 - val_loss: 1.0074 Epoch 11/45 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 20s 4ms/step - Precision: 0.7324 - Recall: 0.4481 - accuracy: 0.6030 - loss: 1.0665 - val_Precision: 0.7545 - val_Recall: 0.4632 - val_accuracy: 0.6212 - val_loss: 1.0086 Epoch 12/45 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 18s 4ms/step - Precision: 0.7312 - Recall: 0.4417 - accuracy: 0.5995 - loss: 1.0686 - val_Precision: 0.7439 - val_Recall: 0.4778 - val_accuracy: 0.6226 - val_loss: 1.0074 Epoch 13/45 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 21s 5ms/step - Precision: 0.7335 - Recall: 0.4477 - accuracy: 0.6030 - loss: 1.0644 - val_Precision: 0.7584 - val_Recall: 0.4695 - val_accuracy: 0.6280 - val_loss: 0.9980 Epoch 14/45 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 42s 5ms/step - Precision: 0.7309 - Recall: 0.4444 - accuracy: 0.6007 - loss: 1.0663 - val_Precision: 0.7522 - val_Recall: 0.4723 - val_accuracy: 0.6259 - val_loss: 1.0003 Epoch 15/45 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 18s 4ms/step - Precision: 0.7333 - Recall: 0.4482 - accuracy: 0.6025 - loss: 1.0613 - val_Precision: 0.7589 - val_Recall: 0.4685 - val_accuracy: 0.6272 - val_loss: 0.9981 Epoch 16/45 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 18s 4ms/step - Precision: 0.7362 - Recall: 0.4491 - accuracy: 0.6042 - loss: 1.0572 - val_Precision: 0.7550 - val_Recall: 0.4750 - val_accuracy: 0.6282 - val_loss: 0.9950 Epoch 17/45 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 20s 5ms/step - Precision: 0.7342 - Recall: 0.4483 - accuracy: 0.6040 - loss: 1.0596 - val_Precision: 0.7522 - val_Recall: 0.4712 - val_accuracy: 0.6250 - val_loss: 0.9980 Epoch 18/45 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 19s 4ms/step - Precision: 0.7326 - Recall: 0.4479 - accuracy: 0.6050 - loss: 1.0600 - val_Precision: 0.7513 - val_Recall: 0.4728 - val_accuracy: 0.6271 - val_loss: 1.0008 Epoch 19/45 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 18s 4ms/step - Precision: 0.7372 - Recall: 0.4517 - accuracy: 0.6064 - loss: 1.0580 - val_Precision: 0.7483 - val_Recall: 0.4831 - val_accuracy: 0.6265 - val_loss: 0.9973 Epoch 20/45 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 21s 4ms/step - Precision: 0.7343 - Recall: 0.4520 - accuracy: 0.6074 - loss: 1.0580 - val_Precision: 0.7575 - val_Recall: 0.4711 - val_accuracy: 0.6281 - val_loss: 0.9928 Epoch 21/45 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 18s 4ms/step - Precision: 0.7370 - Recall: 0.4501 - accuracy: 0.6062 - loss: 1.0549 - val_Precision: 0.7530 - val_Recall: 0.4793 - val_accuracy: 0.6287 - val_loss: 0.9931 Epoch 22/45 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 18s 4ms/step - Precision: 0.7363 - Recall: 0.4492 - accuracy: 0.6048 - loss: 1.0573 - val_Precision: 0.7524 - val_Recall: 0.4798 - val_accuracy: 0.6279 - val_loss: 0.9926 Epoch 23/45 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 22s 4ms/step - Precision: 0.7373 - Recall: 0.4516 - accuracy: 0.6060 - loss: 1.0517 - val_Precision: 0.7629 - val_Recall: 0.4654 - val_accuracy: 0.6289 - val_loss: 0.9937 Epoch 24/45 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 18s 4ms/step - Precision: 0.7370 - Recall: 0.4481 - accuracy: 0.6055 - loss: 1.0533 - val_Precision: 0.7596 - val_Recall: 0.4727 - val_accuracy: 0.6307 - val_loss: 0.9900 Epoch 25/45 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 23s 5ms/step - Precision: 0.7360 - Recall: 0.4526 - accuracy: 0.6069 - loss: 1.0506 - val_Precision: 0.7585 - val_Recall: 0.4755 - val_accuracy: 0.6292 - val_loss: 0.9896 Epoch 26/45 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 18s 4ms/step - Precision: 0.7364 - Recall: 0.4527 - accuracy: 0.6070 - loss: 1.0499 - val_Precision: 0.7639 - val_Recall: 0.4716 - val_accuracy: 0.6306 - val_loss: 0.9900 Epoch 27/45 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 21s 4ms/step - Precision: 0.7359 - Recall: 0.4532 - accuracy: 0.6058 - loss: 1.0477 - val_Precision: 0.7585 - val_Recall: 0.4746 - val_accuracy: 0.6285 - val_loss: 0.9926 Epoch 28/45 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 21s 4ms/step - Precision: 0.7377 - Recall: 0.4533 - accuracy: 0.6086 - loss: 1.0504 - val_Precision: 0.7614 - val_Recall: 0.4701 - val_accuracy: 0.6324 - val_loss: 0.9880 Epoch 29/45 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 20s 4ms/step - Precision: 0.7381 - Recall: 0.4551 - accuracy: 0.6093 - loss: 1.0482 - val_Precision: 0.7598 - val_Recall: 0.4768 - val_accuracy: 0.6337 - val_loss: 0.9851 Epoch 30/45 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 21s 4ms/step - Precision: 0.7373 - Recall: 0.4578 - accuracy: 0.6113 - loss: 1.0428 - val_Precision: 0.7664 - val_Recall: 0.4643 - val_accuracy: 0.6289 - val_loss: 0.9931 Epoch 31/45 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 22s 4ms/step - Precision: 0.7384 - Recall: 0.4570 - accuracy: 0.6087 - loss: 1.0458 - val_Precision: 0.7604 - val_Recall: 0.4691 - val_accuracy: 0.6314 - val_loss: 0.9890 Epoch 32/45 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 18s 4ms/step - Precision: 0.7397 - Recall: 0.4573 - accuracy: 0.6102 - loss: 1.0447 - val_Precision: 0.7583 - val_Recall: 0.4751 - val_accuracy: 0.6310 - val_loss: 0.9879 Epoch 33/45 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 18s 4ms/step - Precision: 0.7395 - Recall: 0.4536 - accuracy: 0.6077 - loss: 1.0498 - val_Precision: 0.7583 - val_Recall: 0.4776 - val_accuracy: 0.6289 - val_loss: 0.9890 Epoch 34/45 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 20s 4ms/step - Precision: 0.7358 - Recall: 0.4524 - accuracy: 0.6067 - loss: 1.0514 - val_Precision: 0.7623 - val_Recall: 0.4739 - val_accuracy: 0.6307 - val_loss: 0.9874 Epoch 35/45 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 18s 4ms/step - Precision: 0.7387 - Recall: 0.4569 - accuracy: 0.6078 - loss: 1.0476 - val_Precision: 0.7607 - val_Recall: 0.4752 - val_accuracy: 0.6342 - val_loss: 0.9851 Epoch 36/45 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 20s 4ms/step - Precision: 0.7370 - Recall: 0.4553 - accuracy: 0.6070 - loss: 1.0464 - val_Precision: 0.7579 - val_Recall: 0.4741 - val_accuracy: 0.6314 - val_loss: 0.9888 Epoch 37/45 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 18s 4ms/step - Precision: 0.7359 - Recall: 0.4540 - accuracy: 0.6084 - loss: 1.0479 - val_Precision: 0.7585 - val_Recall: 0.4819 - val_accuracy: 0.6328 - val_loss: 0.9848 Epoch 38/45 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 21s 4ms/step - Precision: 0.7374 - Recall: 0.4561 - accuracy: 0.6091 - loss: 1.0466 - val_Precision: 0.7546 - val_Recall: 0.4885 - val_accuracy: 0.6323 - val_loss: 0.9835 Epoch 39/45 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 18s 4ms/step - Precision: 0.7355 - Recall: 0.4569 - accuracy: 0.6091 - loss: 1.0465 - val_Precision: 0.7641 - val_Recall: 0.4739 - val_accuracy: 0.6326 - val_loss: 0.9851 Epoch 40/45 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 21s 5ms/step - Precision: 0.7398 - Recall: 0.4582 - accuracy: 0.6097 - loss: 1.0469 - val_Precision: 0.7576 - val_Recall: 0.4757 - val_accuracy: 0.6301 - val_loss: 0.9891 Epoch 41/45 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 41s 5ms/step - Precision: 0.7373 - Recall: 0.4541 - accuracy: 0.6094 - loss: 1.0451 - val_Precision: 0.7535 - val_Recall: 0.4887 - val_accuracy: 0.6337 - val_loss: 0.9818 Epoch 42/45 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 40s 5ms/step - Precision: 0.7392 - Recall: 0.4618 - accuracy: 0.6119 - loss: 1.0390 - val_Precision: 0.7588 - val_Recall: 0.4756 - val_accuracy: 0.6307 - val_loss: 0.9892 Epoch 43/45 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 18s 4ms/step - Precision: 0.7398 - Recall: 0.4569 - accuracy: 0.6107 - loss: 1.0437 - val_Precision: 0.7602 - val_Recall: 0.4782 - val_accuracy: 0.6324 - val_loss: 0.9843 Epoch 44/45 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 18s 4ms/step - Precision: 0.7407 - Recall: 0.4583 - accuracy: 0.6101 - loss: 1.0452 - val_Precision: 0.7604 - val_Recall: 0.4777 - val_accuracy: 0.6339 - val_loss: 0.9853 Epoch 45/45 4420/4420 ━━━━━━━━━━━━━━━━━━━━ 18s 4ms/step - Precision: 0.7374 - Recall: 0.4549 - accuracy: 0.6092 - loss: 1.0463 - val_Precision: 0.7607 - val_Recall: 0.4810 - val_accuracy: 0.6336 - val_loss: 0.9829 Training completed in 942.36 seconds.
results = model2_nn.evaluate(X_test, y_test, verbose=0)
print(f"Test Accuracy: {results[1]:.4f}")
print(f"Test Precision: {results[2]:.4f}")
print(f"Test Recall: {results[3]:.4f}")
Test Accuracy: 0.6342 Test Precision: 0.7607 Test Recall: 0.4752
import matplotlib.pyplot as plt
plt.figure(figsize=(20, 5))
# Precision Plot
plt.subplot(1, 4, 1)
plt.plot(history2.history['Precision'], label='Train Precision')
plt.plot(history2.history['val_Precision'], label='Validation Precision')
plt.xlabel('Epoch')
plt.ylabel('Precision')
plt.title('Precision over Epochs')
plt.legend()
# Recall Plot
plt.subplot(1, 4, 2)
plt.plot(history2.history['Recall'], label='Train Recall')
plt.plot(history2.history['val_Recall'], label='Validation Recall')
plt.xlabel('Epoch')
plt.ylabel('Recall')
plt.title('Recall over Epochs')
plt.legend()
# Accuracy Plot
plt.subplot(1, 4, 3)
plt.plot(history2.history['accuracy'], label='Train Accuracy')
plt.plot(history2.history['val_accuracy'], label='Validation Accuracy')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Accuracy over Epochs')
plt.legend()
# Loss Plot
plt.subplot(1, 4, 4)
plt.plot(history2.history['loss'], label='Train Loss')
plt.plot(history2.history['val_loss'], label='Validation Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Loss over Epochs')
plt.legend()
plt.tight_layout()
plt.show()
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
# Predict on the test set
y_pred_probs2 = model2_nn.predict(X_test)
# Convert one-hot predictions to class labels
y_pred2 = np.argmax(y_pred_probs2, axis=1)
# Convert one-hot true labels to class labels
y_true2= np.argmax(y_test, axis=1)
cm2 = confusion_matrix(y_true2, y_pred2)
class_names = label_encoder.classes_
disp = ConfusionMatrixDisplay(confusion_matrix=cm2, display_labels=class_names)
fig, ax = plt.subplots(figsize=(8,8))
disp.plot(cmap='Blues', ax=ax, xticks_rotation=90)
plt.title('Confusion Matrix')
plt.show()
1105/1105 ━━━━━━━━━━━━━━━━━━━━ 2s 1ms/step
Performance Summary (Test Set):
Accuracy: 63.42%
Precision: 76.07%
Recall: 47.52%
The values when compared to model1, shows not much noticeable improvement in the accuracy, precision and recall is marginally lower compared to model1. This indicates that reducing the number of hidden layers from three to two in Model 2 did not lead to a significant performance gain. The minimal difference suggests that the third hidden layer in Model 1 may not have contributed substantial complexity or learning capacity to justify the additional computational cost. Model 2 thus achieves comparable performance with a simpler architecture, making it a more efficient alternative.
import pandas as pd
from sklearn.utils import shuffle
# Combine X and y into a DataFrame
data_combined = pd.concat([pd.DataFrame(X), pd.Series(y_encoded, name='label')], axis=1)
# Find the minimum class count
min_count = data_combined['label'].value_counts().min()
# Downsample each class
downsampled = data_combined.groupby('label').apply(lambda x: x.sample(min_count, random_state=42)).reset_index(drop=True)
# Separate features and labels
X_downsampled = downsampled.drop('label', axis=1).values
y_downsampled = downsampled['label'].values
# Encode labels to categorical
y_downsampled_categorical = to_categorical(y_downsampled)
# Split into train/test
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(
X_downsampled, y_downsampled_categorical,
test_size=0.2,
random_state=42,
stratify=y_downsampled
)
<ipython-input-33-761a065d90de>:11: DeprecationWarning: DataFrameGroupBy.apply operated on the grouping columns. This behavior is deprecated, and in a future version of pandas the grouping columns will be excluded from the operation. Either pass `include_groups=False` to exclude the groupings or explicitly select the grouping columns after groupby to silence this warning.
downsampled = data_combined.groupby('label').apply(lambda x: x.sample(min_count, random_state=42)).reset_index(drop=True)
# Get the number of features and classes
input_shape = X_train.shape[1]
num_classes = y_train.shape[1]
# Build the model with 2 hidden layers
model4_nn = Sequential()
# First hidden layer
model4_nn.add(Dense(256, activation='relu', input_shape=(input_shape,)))
model4_nn.add(BatchNormalization())
model4_nn.add(Dropout(0.2))
# Second hidden layer
model4_nn.add(Dense(128, activation='relu'))
model4_nn.add(BatchNormalization())
model4_nn.add(Dropout(0.2))
# Output layer
model4_nn.add(Dense(num_classes, activation='softmax'))
# Compile the model
model4_nn.compile(
optimizer=Adam(learning_rate=0.001),
loss='categorical_crossentropy',
metrics=['accuracy', 'Precision', 'Recall']
)
/usr/local/lib/python3.11/dist-packages/keras/src/layers/core/dense.py:87: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead. super().__init__(activity_regularizer=activity_regularizer, **kwargs)
results = model4_nn.evaluate(X_test, y_test, verbose=0)
print(f"Test Accuracy: {results[1]:.4f}")
print(f"Test Precision: {results[2]:.4f}")
print(f"Test Recall: {results[3]:.4f}")
Test Accuracy: 0.6468 Test Precision: 0.7621 Test Recall: 0.5098
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
# Predict on the test set
y_pred_probs4 = model4_nn.predict(X_test)
# Convert one-hot predictions to class labels
y_pred4 = np.argmax(y_pred_probs4, axis=1)
# Convert one-hot true labels to class labels
y_true4= np.argmax(y_test, axis=1)
cm4 = confusion_matrix(y_true4, y_pred4)
class_names = label_encoder.classes_
disp = ConfusionMatrixDisplay(confusion_matrix=cm4, display_labels=class_names)
fig, ax = plt.subplots(figsize=(8,8))
disp.plot(cmap='Blues', ax=ax, xticks_rotation=90)
plt.title('Confusion Matrix')
plt.show()
800/800 ━━━━━━━━━━━━━━━━━━━━ 2s 2ms/step
# Build the model with 2 hidden layers
model5_nn = Sequential()
# First hidden layer
model5_nn.add(Dense(64, activation='relu', input_shape=(input_shape,)))
model5_nn.add(BatchNormalization())
model5_nn.add(Dropout(0.2))
# Second hidden layer
model5_nn.add(Dense(128, activation='relu'))
model5_nn.add(BatchNormalization())
model5_nn.add(Dropout(0.2))
# Output layer
model5_nn.add(Dense(num_classes, activation='softmax'))
# Compile the model
model5_nn.compile(
optimizer=Adam(learning_rate=0.001),
loss='categorical_crossentropy',
metrics=['accuracy', 'Precision', 'Recall']
)
import time
start_time = time.time()
history5 = model5_nn.fit(
X_train, y_train,
validation_data=(X_test, y_test),
epochs=50,
batch_size=128,
verbose=1,
callbacks=[early_stopping],
)
end_time = time.time() # Record the end time
total_time = end_time - start_time
print(f"Training completed in {total_time:.2f} seconds.")
Epoch 1/50 800/800 ━━━━━━━━━━━━━━━━━━━━ 27s 6ms/step - Precision: 0.6216 - Recall: 0.3058 - accuracy: 0.4706 - loss: 1.4942 - val_Precision: 0.7343 - val_Recall: 0.4148 - val_accuracy: 0.5963 - val_loss: 1.1150 Epoch 2/50 800/800 ━━━━━━━━━━━━━━━━━━━━ 3s 4ms/step - Precision: 0.7062 - Recall: 0.3935 - accuracy: 0.5677 - loss: 1.1876 - val_Precision: 0.7353 - val_Recall: 0.4387 - val_accuracy: 0.6056 - val_loss: 1.0889 Epoch 3/50 800/800 ━━━━━━━━━━━━━━━━━━━━ 3s 4ms/step - Precision: 0.7132 - Recall: 0.4090 - accuracy: 0.5790 - loss: 1.1580 - val_Precision: 0.7356 - val_Recall: 0.4479 - val_accuracy: 0.6092 - val_loss: 1.0726 Epoch 4/50 800/800 ━━━━━━━━━━━━━━━━━━━━ 5s 6ms/step - Precision: 0.7204 - Recall: 0.4263 - accuracy: 0.5897 - loss: 1.1294 - val_Precision: 0.7428 - val_Recall: 0.4513 - val_accuracy: 0.6135 - val_loss: 1.0586 Epoch 5/50 800/800 ━━━━━━━━━━━━━━━━━━━━ 4s 4ms/step - Precision: 0.7252 - Recall: 0.4347 - accuracy: 0.5944 - loss: 1.1144 - val_Precision: 0.7410 - val_Recall: 0.4614 - val_accuracy: 0.6152 - val_loss: 1.0484 Epoch 6/50 800/800 ━━━━━━━━━━━━━━━━━━━━ 5s 4ms/step - Precision: 0.7244 - Recall: 0.4404 - accuracy: 0.5969 - loss: 1.1076 - val_Precision: 0.7486 - val_Recall: 0.4576 - val_accuracy: 0.6179 - val_loss: 1.0386 Epoch 7/50 800/800 ━━━━━━━━━━━━━━━━━━━━ 5s 6ms/step - Precision: 0.7252 - Recall: 0.4435 - accuracy: 0.5986 - loss: 1.1010 - val_Precision: 0.7469 - val_Recall: 0.4702 - val_accuracy: 0.6182 - val_loss: 1.0325 Epoch 8/50 800/800 ━━━━━━━━━━━━━━━━━━━━ 4s 4ms/step - Precision: 0.7297 - Recall: 0.4534 - accuracy: 0.6050 - loss: 1.0832 - val_Precision: 0.7425 - val_Recall: 0.4805 - val_accuracy: 0.6218 - val_loss: 1.0263 Epoch 9/50 800/800 ━━━━━━━━━━━━━━━━━━━━ 5s 6ms/step - Precision: 0.7273 - Recall: 0.4508 - accuracy: 0.6037 - loss: 1.0911 - val_Precision: 0.7504 - val_Recall: 0.4662 - val_accuracy: 0.6224 - val_loss: 1.0252 Epoch 10/50 800/800 ━━━━━━━━━━━━━━━━━━━━ 8s 10ms/step - Precision: 0.7318 - Recall: 0.4539 - accuracy: 0.6064 - loss: 1.0815 - val_Precision: 0.7458 - val_Recall: 0.4734 - val_accuracy: 0.6221 - val_loss: 1.0230 Epoch 11/50 800/800 ━━━━━━━━━━━━━━━━━━━━ 4s 5ms/step - Precision: 0.7281 - Recall: 0.4509 - accuracy: 0.6019 - loss: 1.0864 - val_Precision: 0.7483 - val_Recall: 0.4766 - val_accuracy: 0.6261 - val_loss: 1.0171 Epoch 12/50 800/800 ━━━━━━━━━━━━━━━━━━━━ 4s 4ms/step - Precision: 0.7315 - Recall: 0.4528 - accuracy: 0.6046 - loss: 1.0794 - val_Precision: 0.7506 - val_Recall: 0.4770 - val_accuracy: 0.6273 - val_loss: 1.0147 Epoch 13/50 800/800 ━━━━━━━━━━━━━━━━━━━━ 5s 6ms/step - Precision: 0.7316 - Recall: 0.4570 - accuracy: 0.6072 - loss: 1.0730 - val_Precision: 0.7498 - val_Recall: 0.4758 - val_accuracy: 0.6255 - val_loss: 1.0150 Epoch 14/50 800/800 ━━━━━━━━━━━━━━━━━━━━ 4s 4ms/step - Precision: 0.7333 - Recall: 0.4583 - accuracy: 0.6075 - loss: 1.0705 - val_Precision: 0.7423 - val_Recall: 0.4868 - val_accuracy: 0.6233 - val_loss: 1.0146 Epoch 15/50 800/800 ━━━━━━━━━━━━━━━━━━━━ 4s 4ms/step - Precision: 0.7327 - Recall: 0.4636 - accuracy: 0.6108 - loss: 1.0649 - val_Precision: 0.7478 - val_Recall: 0.4816 - val_accuracy: 0.6273 - val_loss: 1.0115 Epoch 16/50 800/800 ━━━━━━━━━━━━━━━━━━━━ 4s 5ms/step - Precision: 0.7341 - Recall: 0.4627 - accuracy: 0.6121 - loss: 1.0643 - val_Precision: 0.7512 - val_Recall: 0.4791 - val_accuracy: 0.6279 - val_loss: 1.0049 Epoch 17/50 800/800 ━━━━━━━━━━━━━━━━━━━━ 4s 5ms/step - Precision: 0.7340 - Recall: 0.4653 - accuracy: 0.6093 - loss: 1.0647 - val_Precision: 0.7465 - val_Recall: 0.4917 - val_accuracy: 0.6275 - val_loss: 1.0051 Epoch 18/50 800/800 ━━━━━━━━━━━━━━━━━━━━ 4s 4ms/step - Precision: 0.7341 - Recall: 0.4655 - accuracy: 0.6141 - loss: 1.0644 - val_Precision: 0.7454 - val_Recall: 0.4947 - val_accuracy: 0.6295 - val_loss: 1.0029 Epoch 19/50 800/800 ━━━━━━━━━━━━━━━━━━━━ 4s 5ms/step - Precision: 0.7326 - Recall: 0.4659 - accuracy: 0.6115 - loss: 1.0632 - val_Precision: 0.7471 - val_Recall: 0.4893 - val_accuracy: 0.6296 - val_loss: 1.0028 Epoch 20/50 800/800 ━━━━━━━━━━━━━━━━━━━━ 5s 4ms/step - Precision: 0.7341 - Recall: 0.4696 - accuracy: 0.6149 - loss: 1.0560 - val_Precision: 0.7503 - val_Recall: 0.4868 - val_accuracy: 0.6286 - val_loss: 1.0030 Epoch 21/50 800/800 ━━━━━━━━━━━━━━━━━━━━ 3s 4ms/step - Precision: 0.7310 - Recall: 0.4649 - accuracy: 0.6132 - loss: 1.0623 - val_Precision: 0.7540 - val_Recall: 0.4817 - val_accuracy: 0.6297 - val_loss: 1.0011 Epoch 22/50 800/800 ━━━━━━━━━━━━━━━━━━━━ 6s 5ms/step - Precision: 0.7366 - Recall: 0.4706 - accuracy: 0.6163 - loss: 1.0497 - val_Precision: 0.7542 - val_Recall: 0.4833 - val_accuracy: 0.6291 - val_loss: 0.9997 Epoch 23/50 800/800 ━━━━━━━━━━━━━━━━━━━━ 4s 5ms/step - Precision: 0.7345 - Recall: 0.4673 - accuracy: 0.6146 - loss: 1.0574 - val_Precision: 0.7547 - val_Recall: 0.4816 - val_accuracy: 0.6305 - val_loss: 0.9994 Epoch 24/50 800/800 ━━━━━━━━━━━━━━━━━━━━ 5s 4ms/step - Precision: 0.7376 - Recall: 0.4675 - accuracy: 0.6128 - loss: 1.0524 - val_Precision: 0.7468 - val_Recall: 0.4920 - val_accuracy: 0.6275 - val_loss: 0.9991 Epoch 25/50 800/800 ━━━━━━━━━━━━━━━━━━━━ 3s 4ms/step - Precision: 0.7354 - Recall: 0.4681 - accuracy: 0.6118 - loss: 1.0557 - val_Precision: 0.7531 - val_Recall: 0.4822 - val_accuracy: 0.6290 - val_loss: 0.9998 Epoch 26/50 800/800 ━━━━━━━━━━━━━━━━━━━━ 6s 5ms/step - Precision: 0.7365 - Recall: 0.4711 - accuracy: 0.6142 - loss: 1.0504 - val_Precision: 0.7544 - val_Recall: 0.4850 - val_accuracy: 0.6299 - val_loss: 0.9985 Epoch 27/50 800/800 ━━━━━━━━━━━━━━━━━━━━ 3s 4ms/step - Precision: 0.7389 - Recall: 0.4717 - accuracy: 0.6145 - loss: 1.0509 - val_Precision: 0.7458 - val_Recall: 0.4946 - val_accuracy: 0.6314 - val_loss: 0.9966 Epoch 28/50 800/800 ━━━━━━━━━━━━━━━━━━━━ 6s 5ms/step - Precision: 0.7355 - Recall: 0.4715 - accuracy: 0.6162 - loss: 1.0507 - val_Precision: 0.7522 - val_Recall: 0.4889 - val_accuracy: 0.6326 - val_loss: 0.9941 Epoch 29/50 800/800 ━━━━━━━━━━━━━━━━━━━━ 4s 5ms/step - Precision: 0.7334 - Recall: 0.4678 - accuracy: 0.6122 - loss: 1.0591 - val_Precision: 0.7526 - val_Recall: 0.4894 - val_accuracy: 0.6314 - val_loss: 0.9930 Epoch 30/50 800/800 ━━━━━━━━━━━━━━━━━━━━ 4s 4ms/step - Precision: 0.7370 - Recall: 0.4708 - accuracy: 0.6141 - loss: 1.0499 - val_Precision: 0.7562 - val_Recall: 0.4843 - val_accuracy: 0.6328 - val_loss: 0.9943 Epoch 31/50 800/800 ━━━━━━━━━━━━━━━━━━━━ 8s 8ms/step - Precision: 0.7367 - Recall: 0.4713 - accuracy: 0.6152 - loss: 1.0460 - val_Precision: 0.7548 - val_Recall: 0.4825 - val_accuracy: 0.6309 - val_loss: 0.9993 Epoch 32/50 800/800 ━━━━━━━━━━━━━━━━━━━━ 4s 4ms/step - Precision: 0.7349 - Recall: 0.4707 - accuracy: 0.6167 - loss: 1.0460 - val_Precision: 0.7493 - val_Recall: 0.4924 - val_accuracy: 0.6310 - val_loss: 0.9928 Epoch 33/50 800/800 ━━━━━━━━━━━━━━━━━━━━ 5s 4ms/step - Precision: 0.7400 - Recall: 0.4733 - accuracy: 0.6175 - loss: 1.0463 - val_Precision: 0.7506 - val_Recall: 0.4860 - val_accuracy: 0.6316 - val_loss: 0.9939 Epoch 34/50 800/800 ━━━━━━━━━━━━━━━━━━━━ 8s 10ms/step - Precision: 0.7386 - Recall: 0.4749 - accuracy: 0.6176 - loss: 1.0436 - val_Precision: 0.7534 - val_Recall: 0.4864 - val_accuracy: 0.6308 - val_loss: 0.9941 Epoch 35/50 800/800 ━━━━━━━━━━━━━━━━━━━━ 9s 11ms/step - Precision: 0.7376 - Recall: 0.4730 - accuracy: 0.6166 - loss: 1.0476 - val_Precision: 0.7560 - val_Recall: 0.4833 - val_accuracy: 0.6299 - val_loss: 0.9963 Epoch 36/50 800/800 ━━━━━━━━━━━━━━━━━━━━ 5s 6ms/step - Precision: 0.7396 - Recall: 0.4735 - accuracy: 0.6168 - loss: 1.0457 - val_Precision: 0.7447 - val_Recall: 0.4989 - val_accuracy: 0.6314 - val_loss: 0.9935 Epoch 37/50 800/800 ━━━━━━━━━━━━━━━━━━━━ 3s 4ms/step - Precision: 0.7372 - Recall: 0.4747 - accuracy: 0.6169 - loss: 1.0430 - val_Precision: 0.7560 - val_Recall: 0.4839 - val_accuracy: 0.6328 - val_loss: 0.9910 Epoch 38/50 800/800 ━━━━━━━━━━━━━━━━━━━━ 9s 11ms/step - Precision: 0.7392 - Recall: 0.4744 - accuracy: 0.6181 - loss: 1.0434 - val_Precision: 0.7551 - val_Recall: 0.4824 - val_accuracy: 0.6340 - val_loss: 0.9940 Epoch 39/50 800/800 ━━━━━━━━━━━━━━━━━━━━ 8s 9ms/step - Precision: 0.7357 - Recall: 0.4710 - accuracy: 0.6154 - loss: 1.0438 - val_Precision: 0.7536 - val_Recall: 0.4878 - val_accuracy: 0.6322 - val_loss: 0.9901 Epoch 40/50 800/800 ━━━━━━━━━━━━━━━━━━━━ 11s 10ms/step - Precision: 0.7380 - Recall: 0.4783 - accuracy: 0.6199 - loss: 1.0401 - val_Precision: 0.7512 - val_Recall: 0.4888 - val_accuracy: 0.6310 - val_loss: 0.9946 Epoch 41/50 800/800 ━━━━━━━━━━━━━━━━━━━━ 8s 10ms/step - Precision: 0.7367 - Recall: 0.4688 - accuracy: 0.6153 - loss: 1.0484 - val_Precision: 0.7514 - val_Recall: 0.4920 - val_accuracy: 0.6325 - val_loss: 0.9920 Epoch 42/50 800/800 ━━━━━━━━━━━━━━━━━━━━ 5s 6ms/step - Precision: 0.7373 - Recall: 0.4715 - accuracy: 0.6182 - loss: 1.0408 - val_Precision: 0.7526 - val_Recall: 0.4904 - val_accuracy: 0.6329 - val_loss: 0.9927 Epoch 43/50 800/800 ━━━━━━━━━━━━━━━━━━━━ 3s 4ms/step - Precision: 0.7378 - Recall: 0.4736 - accuracy: 0.6166 - loss: 1.0449 - val_Precision: 0.7500 - val_Recall: 0.4960 - val_accuracy: 0.6325 - val_loss: 0.9875 Epoch 44/50 800/800 ━━━━━━━━━━━━━━━━━━━━ 3s 4ms/step - Precision: 0.7375 - Recall: 0.4733 - accuracy: 0.6145 - loss: 1.0417 - val_Precision: 0.7560 - val_Recall: 0.4893 - val_accuracy: 0.6343 - val_loss: 0.9901 Epoch 45/50 800/800 ━━━━━━━━━━━━━━━━━━━━ 8s 10ms/step - Precision: 0.7410 - Recall: 0.4758 - accuracy: 0.6193 - loss: 1.0356 - val_Precision: 0.7511 - val_Recall: 0.4964 - val_accuracy: 0.6331 - val_loss: 0.9880 Epoch 46/50 800/800 ━━━━━━━━━━━━━━━━━━━━ 8s 6ms/step - Precision: 0.7390 - Recall: 0.4777 - accuracy: 0.6211 - loss: 1.0444 - val_Precision: 0.7537 - val_Recall: 0.4934 - val_accuracy: 0.6330 - val_loss: 0.9911 Epoch 47/50 800/800 ━━━━━━━━━━━━━━━━━━━━ 11s 8ms/step - Precision: 0.7412 - Recall: 0.4787 - accuracy: 0.6192 - loss: 1.0378 - val_Precision: 0.7586 - val_Recall: 0.4893 - val_accuracy: 0.6351 - val_loss: 0.9865 Epoch 48/50 800/800 ━━━━━━━━━━━━━━━━━━━━ 11s 9ms/step - Precision: 0.7401 - Recall: 0.4760 - accuracy: 0.6196 - loss: 1.0434 - val_Precision: 0.7539 - val_Recall: 0.4896 - val_accuracy: 0.6339 - val_loss: 0.9906 Epoch 49/50 800/800 ━━━━━━━━━━━━━━━━━━━━ 6s 4ms/step - Precision: 0.7428 - Recall: 0.4782 - accuracy: 0.6211 - loss: 1.0363 - val_Precision: 0.7536 - val_Recall: 0.4950 - val_accuracy: 0.6347 - val_loss: 0.9868 Epoch 50/50 800/800 ━━━━━━━━━━━━━━━━━━━━ 6s 7ms/step - Precision: 0.7398 - Recall: 0.4778 - accuracy: 0.6214 - loss: 1.0395 - val_Precision: 0.7546 - val_Recall: 0.4918 - val_accuracy: 0.6343 - val_loss: 0.9879 Training completed in 300.60 seconds.
results = model5_nn.evaluate(X_test, y_test, verbose=0)
print(f"Test Accuracy: {results[1]:.4f}")
print(f"Test Precision: {results[2]:.4f}")
print(f"Test Recall: {results[3]:.4f}")
Test Accuracy: 0.6351 Test Precision: 0.7586 Test Recall: 0.4893
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
# Predict on the test set
y_pred_probs5 = model4_nn.predict(X_test)
# Convert one-hot predictions to class labels
y_pred5 = np.argmax(y_pred_probs4, axis=1)
# Convert one-hot true labels to class labels
y_true5= np.argmax(y_test, axis=1)
cm5 = confusion_matrix(y_true5, y_pred5)
class_names = label_encoder.classes_
disp = ConfusionMatrixDisplay(confusion_matrix=cm5, display_labels=class_names)
fig, ax = plt.subplots(figsize=(8,8))
disp.plot(cmap='Blues', ax=ax, xticks_rotation=90)
plt.title('Confusion Matrix')
plt.show()
800/800 ━━━━━━━━━━━━━━━━━━━━ 1s 1ms/step
from sklearn.metrics import classification_report
# For Model A
report_a = classification_report(y_true2, y_pred2, target_names=label_encoder.classes_)
print("Model A Classification Report:\n", report_a)
# For Model B
report_b = classification_report(y_true4, y_pred4, target_names=label_encoder.classes_)
print("Model B Classification Report with Undersampling:\n", report_b)
Model A Classification Report:
precision recall f1-score support
Children/Anime 0.71 0.55 0.62 3135
Classical/Opera 0.76 0.83 0.79 3398
Country/Folk 0.50 0.55 0.52 3086
Dance/Electronic 0.65 0.72 0.68 8477
Hip-Hop/Rap/R&B 0.59 0.61 0.60 2845
Jazz/Blues 0.53 0.51 0.52 3307
Movie/Comedy 0.83 0.75 0.79 3495
Pop/Rock 0.47 0.45 0.46 4321
World/Soundtrack 0.68 0.64 0.66 3291
accuracy 0.63 35355
macro avg 0.64 0.62 0.63 35355
weighted avg 0.64 0.63 0.63 35355
Model B Classification Report with Undersampling:
precision recall f1-score support
Children/Anime 0.76 0.60 0.67 2844
Classical/Opera 0.75 0.85 0.79 2844
Country/Folk 0.53 0.62 0.57 2845
Dance/Electronic 0.59 0.49 0.54 2845
Hip-Hop/Rap/R&B 0.62 0.80 0.70 2845
Jazz/Blues 0.58 0.58 0.58 2844
Movie/Comedy 0.84 0.79 0.82 2845
Pop/Rock 0.48 0.43 0.45 2844
World/Soundtrack 0.69 0.65 0.67 2844
accuracy 0.65 25600
macro avg 0.65 0.65 0.64 25600
weighted avg 0.65 0.65 0.64 25600
Performance Summary (Test Set) for model 4(2 hidden layers with 256 and 128 units):
Accuracy: 64.68%
Precision: 76.21%
Recall: 50.98%
These results mark a notable improvement in recall, while accuracy and precision slightly exceeded when compared to previous models. The higher recall indicates the model became more sensitive to identifying correct genre labels across all classes, likely due to the balanced class distribution during training. Undersampling the data led to better generalization across all genre classes, especially for those with the fewer samples for the class.
Even the confusion matrix shows predictions are more evenly distributed across genre classes, with less dominance by majority class Dance/Electronic.
Performance Summary (Test Set) for model 5(2 hidden layers with 64 and 128 units):
Accuracy: 63.52%
Precision: 75.86%
Recall: 48.93%
Compared to Model 4, Model 5 demonstrated a slight decline in all performance metrics. This suggests that reducing the capacity of the first hidden layer to 64 units may have limited the model’s ability to effectively learn complex feature patterns, resulting in lower generalization performance.
Among all models evaluated, the neural network trained on balanced (undersampled) data with two hidden layers of sizes 256 and 128 (Model 4) achieved the best overall results. This makes it the most effective architecture for multiclass genre classification in this project.
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc
from sklearn.preprocessing import label_binarize
y_true = y_test
n_classes = y_true.shape[1]
fpr = dict()
tpr = dict()
roc_auc = dict()
for i in range(n_classes):
fpr[i], tpr[i], _ = roc_curve(y_true[:, i], y_pred_probs4[:, i])
roc_auc[i] = auc(fpr[i], tpr[i])
plt.figure(figsize=(10, 8))
colors = ['aqua', 'darkorange', 'cornflowerblue', 'green', 'red',
'purple', 'brown', 'pink', 'grey', 'olive', 'gold', 'black']
for i, color in zip(range(n_classes), colors):
plt.plot(fpr[i], tpr[i], color=color, lw=2,
label=f'ROC curve for {label_encoder.classes_[i]} (area = {roc_auc[i]:0.2f})')
plt.plot([0, 1], [0, 1], 'k--', lw=2) # Diagonal line
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('AUC-ROC Curves per Species')
plt.legend(loc='lower right')
plt.grid(True)
plt.show()
From the above plot, the AUC (Area Under Curve) values range from 0.89 to 0.98, which indicates that the model performs very well across all genre classes.
Classical/Opera and Movie/Comedy achieved the highest AUC values of 0.98, suggesting that the model is highly effective in distinguishing these genres from the others. The lowest AUC was for Pop/Rock (0.89), indicating that this genre is the most challenging to classify.
Conventional Methods¶
df_multi = X.copy()
y_multi = spotify_df['genre_grouped']
X_train, X_test, y_train, y_test = train_test_split(
df_multi, y_multi, test_size=0.2, random_state=42, stratify=y_multi
)
Decision Trees¶
cv = KFold(n_splits = 5, shuffle = True, random_state = 5322)
param_grid = {
'max_depth': [3, 5, 6, X_train.shape[1]],
'min_samples_split': [2, 3, 5, 10],
'min_samples_leaf': [1, 2, 4],
'class_weight': [None, 'balanced']
}
multi_dt = DecisionTreeClassifier(random_state = 5322)
grid_search = GridSearchCV(
estimator = multi_dt,
param_grid = param_grid,
cv = cv,
scoring = 'f1_weighted',
n_jobs = -1,
verbose = 1
)
start_time = time.time()
grid_search.fit(X_train, y_train)
end_time = time.time()
training_duration = end_time - start_time
print(f"Training time (Decision Tree with GridSearchCV): {training_duration:.2f} seconds")
Fitting 5 folds for each of 96 candidates, totalling 480 fits Training time (Decision Tree with GridSearchCV): 56.89 seconds
grid_search.best_params_
{'class_weight': None,
'max_depth': 11,
'min_samples_leaf': 4,
'min_samples_split': 2}
best_multi_dt = grid_search.best_estimator_
binary_multi_pred = best_multi_dt.predict(X_test)
print(classification_report(y_test, binary_multi_pred))
precision recall f1-score support
Children/Anime 0.64 0.49 0.56 3135
Classical/Opera 0.74 0.78 0.76 3398
Country/Folk 0.43 0.52 0.47 3086
Dance/Electronic 0.60 0.65 0.62 8477
Hip-Hop/Rap/R&B 0.58 0.55 0.56 2845
Jazz/Blues 0.45 0.38 0.41 3307
Movie/Comedy 0.81 0.72 0.76 3495
Pop/Rock 0.42 0.45 0.44 4321
World/Soundtrack 0.62 0.58 0.60 3291
accuracy 0.58 35355
macro avg 0.59 0.57 0.58 35355
weighted avg 0.59 0.58 0.58 35355
class_names = [
"Children/Anime",
"Classical/Opera",
"Country/Folk",
"Dance/Electronic",
"Hip-Hop/Rap/R&B",
"Jazz/Blues",
"Movie/Comedy",
"Pop/Rock",
"World/Soundtrack"
]
cm_dt = confusion_matrix(y_test, binary_multi_pred)
disp = ConfusionMatrixDisplay(confusion_matrix=cm_dt, display_labels=class_names)
fig, ax = plt.subplots(figsize=(8,8))
disp.plot(cmap='Blues', ax=ax, xticks_rotation=90)
plt.title('Confusion Matrix')
plt.show()
# Plotting the variable importance for boosting model
feature_importance = best_multi_dt.feature_importances_*100
rel_imp = pd.Series(feature_importance, index=numeric_features).sort_values(ascending = True,inplace=False)
rel_imp.tail(10).T.plot(kind='barh')
plt.xlabel('Variable importance')
plt.title('Variable importance for Decision Tree model')
Text(0.5, 1.0, 'Variable importance for Decision Tree model')
plt.figure(figsize=(50,50))
plot_tree(best_multi_dt
, filled=True
, feature_names=numeric_features
, label='all'
, fontsize=12)
plt.show()
The Decision Tree model was trained to classify tracks into one of nine grouped genres using musical features such as tempo, loudness, acousticness, and danceability. Hyperparameters 'max_depth', 'min_samples_split', 'min_samples_leaf' were optimized using a grid search with 5-fold cross-validation.
Best Model Parameters: max_depth: 11 min_samples_split: 2 min_samples_leaf: 4 class_weight: None
Overall Performance: Accuracy: 58%
Children/Anime - Of all the tracks the model labeled as Children/Anime, 64% were actually from this genre. However, it only managed to correctly identify 49% of all actual Children/Anime tracks. So it's fairly precise but misses many true ones.
Classical/Opera - Of all the tracks labeled Classical/Opera, 74% were correct. And it captured 78% of all true Classical/Opera tracks. This is one of the most reliable genres in terms of prediction.
Country/Folk - Only 43% of tracks predicted as Country/Folk were correct meaning it's often confused with other genres. It found 52% of actual Country/Folk tracks. Both precision and recall are low, indicating confusion with similar genres.
Dance/Electronic - When predicting Dance/Electronic, 60% of those predictions were correct, and it captured 65% of true instances. This is decent, showing good model understanding for this genre.
Hip-Hop/Rap/R&B - Of all the tracks predicted as Hip-Hop/Rap/R&B, 58% were correct, and it found 55% of the actual ones. Performance here is average, with moderate false positives and false negatives.
Jazz/Blues - Only 45% of the predicted Jazz/Blues tracks were truly from this genre, and just 38% of actual Jazz/Blues tracks were identified. This is one of the weakest genres, indicating heavy misclassification.
Movie/Comedy - 81% of the tracks labeled as Movie/Comedy were correct showing the model is very confident when it predicts this genre. It also identified 72% of all actual Movie/Comedy tracks. This is a very strong performing class.
Pop/Rock - Only 42% of the tracks predicted as Pop/Rock were correct, and the model found 45% of the true Pop/Rock tracks. Performance is weak, possibly due to similarity with Country or Dance genres.
World/Soundtrack - Of all tracks labeled World/Soundtrack, 62% were correct, and it captured 58% of actual ones. This is a moderately well-performing class, with some confusion likely with Classical or Movie genres.
Random Forest¶
# Random Forest
# Parameter grid for tuning
#param_grid = {
#'n_estimators': [150, 200, 250],
#'max_depth': [4,5,6],
#'min_samples_split': [2, 5],
#'min_samples_leaf': [1, 2],
#'max_features': [0.5]
#}
#grid_search = GridSearchCV(
#estimator=rf,
#param_grid=param_grid,
#cv=5,
#scoring='f1_weighted',
#n_jobs=-1,
#verbose=1
#)
rf = RandomForestClassifier(n_estimators = 200, max_depth = 6, max_features=6, random_state=42)
start_time = time.time()
rf.fit(X_train, y_train)
end_time = time.time()
training_duration = end_time - start_time
print(f"Training time (RandomForest): {training_duration:.2f} seconds")
Training time (RandomForest): 38.42 seconds
y_pred_best = rf.predict(X_test)
print("Classification Report:")
print(classification_report(y_test, y_pred_best))
Classification Report:
precision recall f1-score support
Children/Anime 0.58 0.39 0.47 3135
Classical/Opera 0.67 0.82 0.74 3398
Country/Folk 0.42 0.38 0.40 3086
Dance/Electronic 0.50 0.74 0.60 8477
Hip-Hop/Rap/R&B 0.58 0.48 0.52 2845
Jazz/Blues 0.45 0.17 0.25 3307
Movie/Comedy 0.90 0.62 0.73 3495
Pop/Rock 0.40 0.47 0.43 4321
World/Soundtrack 0.62 0.50 0.55 3291
accuracy 0.54 35355
macro avg 0.57 0.51 0.52 35355
weighted avg 0.56 0.54 0.53 35355
feature_importance = rf.feature_importances_*100
rel_imp = pd.Series(feature_importance, index=numeric_features).sort_values(ascending = True,inplace=False)
rel_imp.tail(10).T.plot(kind='barh')
plt.xlabel('Variable importance')
plt.title('Variable importance for Decision Tree model')
Text(0.5, 1.0, 'Variable importance for Decision Tree model')
cm_rf = confusion_matrix(y_test, y_pred_best)
disp = ConfusionMatrixDisplay(confusion_matrix=cm_rf, display_labels=class_names)
fig, ax = plt.subplots(figsize=(8,8))
disp.plot(cmap='Blues', ax=ax, xticks_rotation=90)
plt.title('Confusion Matrix')
plt.show()
class_names1 = [
"1: Children/Anime",
"2: Classical/Opera",
"3: Country/Folk",
"4: Dance/Electronic",
"5: Hip-Hop/Rap/R&B",
"6: Jazz/Blues",
"7: Movie/Comedy",
"8: Pop/Rock",
"9: World/Soundtrack"
]
fig = plt.figure(figsize=(20, 15))
plot_tree(rf.estimators_[0], feature_names=numeric_features, class_names=class_names1, filled=True, rounded=True, fontsize = 5)
plt.title("Random Forest - Tree 0 (depth limited to 6)")
plt.show()
fig.savefig("rf_tree_depth4.pdf", bbox_inches='tight')
plt.close(fig)
Best Model Parameters: n_estimators = 200, max_depth = 6, max_features=6
Model Performance: Accuracy - 54%
Higher precision in Movie/Comedy (90%) and recall in Dance/Electronic (74%) and Classical/Opera (82%). This implies that when the model predicts a track as Movie/Comedy 9 out 10 times eventhough the dataset contains Dance/Electronic as the majority class. Similarly the model successfully retriueves most of the actual tracks belonging to the genre Dance/Electronic and Classical/Opera.
But overall, recall drops sharply in many genres especially Jazz/Blues.
Gradient Boosting¶
# Parameter grid for tuning
#param_grid = {
#'n_estimators': [150, 200, 250],
#'max_depth': [4,5,6],
#'min_samples_split': [2, 5],
#'min_samples_leaf': [1, 2],
#'learning_rate': [0.01, 0.05, 0.1]
#}
#grid_search = GridSearchCV(
#estimator=Tree_Bst_reg,
#param_grid=param_grid,
#cv=5,
#scoring='f1_weighted',
#n_jobs=-1,
#verbose=1
#)
Tree_Bst_reg = GradientBoostingClassifier(n_estimators=250, learning_rate=0.1, max_depth=4, random_state=1)
start_time = time.time()
Tree_Bst_reg.fit(X_train,y_train)
end_time = time.time()
training_duration = end_time - start_time
print(f"Training time (GradientBoosting ): {training_duration:.2f} seconds")
Training time (GradientBoosting ): 881.50 seconds
y_pred_bag_reg = Tree_Bst_reg.predict(X_test)
print("Classification Report:")
print(classification_report(y_test, y_pred_bag_reg))
Classification Report:
precision recall f1-score support
Children/Anime 0.71 0.55 0.62 3135
Classical/Opera 0.78 0.82 0.80 3398
Country/Folk 0.49 0.56 0.52 3086
Dance/Electronic 0.65 0.72 0.68 8477
Hip-Hop/Rap/R&B 0.59 0.60 0.60 2845
Jazz/Blues 0.54 0.46 0.50 3307
Movie/Comedy 0.83 0.76 0.80 3495
Pop/Rock 0.47 0.47 0.47 4321
World/Soundtrack 0.67 0.65 0.66 3291
accuracy 0.63 35355
macro avg 0.64 0.62 0.63 35355
weighted avg 0.64 0.63 0.63 35355
cm_gb = confusion_matrix(y_test, y_pred_bag_reg)
disp = ConfusionMatrixDisplay(confusion_matrix=cm_gb, display_labels=class_names)
fig, ax = plt.subplots(figsize=(8,8))
disp.plot(cmap='Blues', ax=ax, xticks_rotation=90)
plt.title('Confusion Matrix')
plt.show()
feature_importance = Tree_Bst_reg.feature_importances_*100
rel_imp = pd.Series(feature_importance, index=numeric_features).sort_values(ascending = True,inplace=False)
rel_imp.tail(10).T.plot(kind='barh')
plt.xlabel('Variable importance')
plt.title('Variable importance for Decision Tree model')
Text(0.5, 1.0, 'Variable importance for Decision Tree model')
class_names = [
"1: Children/Anime",
"2: Classical/Opera",
"3: Country/Folk",
"4: Dance/Electronic",
"5: Hip-Hop/Rap/R&B",
"6: Jazz/Blues",
"7: Movie/Comedy",
"8: Pop/Rock",
"9: World/Soundtrack"
]
plt.figure(figsize=(20, 10))
plot_tree(Tree_Bst_reg.estimators_[0,0], feature_names=numeric_features, class_names=class_names, filled=True, rounded=True, max_depth=4)
plt.title("Gradient Boosting - Tree 0")
plt.show()
Model Parameters: n_estimators=200 learning_rate=0.1 max_depth=4
Model Performance: Accuracy = 63%
Classical/Opera and Movie/Comedy which has a perfect balance between precision and recall, model is both accurate and consistent in identifying this genre.
Dance/Electronic: The interesting thing here is even though the model is exposed to a significantly higher number of Dance/Electronic tracks during training, it does not overfit by assigning this label to ambiguous tracks. Instead, it tries to balance class prediction, maintaining reasonable precision
This indicates the model is not biased towards the majority class.
Pop/Rock has both low precision (47%) and recall (47%) indicating this genre remains hard for the model to classify.
Jazz/Blues has one of the lowest recall scores (46%) many actual Jazz/Blues tracks are being misclassified.
Indicates this genre is still not well captured by the model.
Gradient Boosting achieves the best balance between precision and recall across all most all genres when compared to decision tree and random forest.
y_multibst = spotify_df['genre_grouped']
label_encoder = LabelEncoder()
y_encoded = label_encoder.fit_transform(y_multibst)
y_train_ds1 = label_encoder.fit_transform(y_train)
y_test_ds1 = label_encoder.fit_transform(y_test)
X_train, X_test, y_train, y_test = train_test_split(
df_multi, y_encoded, test_size=0.2, random_state=42, stratify=y_multi
)
import xgboost as xgb
# Create the XGBoost classifier
xgb_clf = xgb.XGBClassifier(
objective='multi:softmax',
num_class=9,
eval_metric='mlogloss',
use_label_encoder=False,
n_estimators=250,
max_depth=5,
learning_rate=0.1,
random_state=42
)
# Train the model
start_time = time.time()
xgb_clf.fit(X_train, y_train)
y_pred_xgb = xgb_clf.predict(X_test)
end_time = time.time()
training_duration = end_time - start_time
print(f"Training time (XGBoost): {training_duration:.2f} seconds")
cm_xgb = confusion_matrix(y_test, y_pred_xgb)
disp = ConfusionMatrixDisplay(confusion_matrix=cm_xgb, display_labels=class_names)
fig, ax = plt.subplots(figsize=(8,8))
disp.plot(cmap='Blues', ax=ax, xticks_rotation=90)
plt.title('Confusion Matrix')
plt.show()
print("\nClassification Report:")
print(classification_report(y_test, y_pred_xgb))
/opt/anaconda3/lib/python3.12/site-packages/xgboost/training.py:183: UserWarning: [11:28:39] WARNING: /Users/runner/work/xgboost/xgboost/src/learner.cc:738:
Parameters: { "use_label_encoder" } are not used.
bst.update(dtrain, iteration=i, fobj=obj)
Training time (XGBoost): 3.95 seconds
Classification Report:
precision recall f1-score support
0 0.73 0.55 0.63 3135
1 0.78 0.83 0.80 3398
2 0.49 0.57 0.52 3086
3 0.65 0.72 0.68 8477
4 0.60 0.61 0.61 2845
5 0.55 0.47 0.51 3307
6 0.84 0.76 0.80 3495
7 0.47 0.46 0.47 4321
8 0.68 0.67 0.68 3291
accuracy 0.64 35355
macro avg 0.64 0.63 0.63 35355
weighted avg 0.64 0.64 0.64 35355
feature_importance = xgb_clf.feature_importances_*100
rel_imp = pd.Series(feature_importance, index=numeric_features).sort_values(ascending = True,inplace=False)
rel_imp.tail(10).T.plot(kind='barh')
plt.xlabel('Variable importance')
plt.title('Variable importance for Decision Tree model')
Text(0.5, 1.0, 'Variable importance for Decision Tree model')
KNN¶
X_train_pca, X_test_pca, y_train_pca, y_test_pca = train_test_split(
df_multi, y_multi, test_size=0.2, random_state=42, stratify=y_multi
)
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=5, metric="euclidean", weights= "uniform", p=2) # Euclidean distance
knn.fit(X_train, y_train)
y_pred_knn = knn.predict(X_test)
print(classification_report(y_test, y_pred_knn))
precision recall f1-score support
0 0.55 0.56 0.56 3135
1 0.69 0.79 0.74 3398
2 0.37 0.51 0.43 3086
3 0.60 0.69 0.64 8477
4 0.55 0.49 0.52 2845
5 0.48 0.38 0.43 3307
6 0.83 0.71 0.76 3495
7 0.41 0.31 0.36 4321
8 0.61 0.56 0.58 3291
accuracy 0.57 35355
macro avg 0.57 0.56 0.56 35355
weighted avg 0.57 0.57 0.57 35355
from sklearn.decomposition import PCA, TruncatedSVD
svd = TruncatedSVD(n_components=df_multi.shape[1] - 1, random_state=42)
svd.fit(df_multi)
TruncatedSVD(n_components=10, random_state=42)In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.
TruncatedSVD(n_components=10, random_state=42)
explained_var = np.cumsum(svd.explained_variance_ratio_)
plt.figure(figsize=(8, 4))
plt.plot(explained_var, marker='o')
plt.axhline(y=0.95, color='r', linestyle='--')
plt.title("Cumulative Explained Variance by SVD Components")
plt.xlabel("Number of Components")
plt.ylabel("Cumulative Variance")
plt.grid(True)
plt.tight_layout()
plt.show()
pca = PCA(n_components=7, random_state=42)
X_train_pca = pca.fit_transform(df_multi)
X_train_pca, X_test_pca, y_train_pca, y_test_pca = train_test_split(
X_train_pca, y_multi, test_size=0.2, random_state=42, stratify=y_multi
)
from sklearn.neighbors import KNeighborsClassifier
knn = KNeighborsClassifier(n_neighbors=5, metric="euclidean", weights= "uniform", p=2) # Euclidean distance
knn.fit(X_train, y_train)
y_pred_knn = knn.predict(X_test)
print(classification_report(y_test, y_pred_knn))
precision recall f1-score support
0 0.55 0.56 0.56 3135
1 0.69 0.79 0.74 3398
2 0.37 0.51 0.43 3086
3 0.60 0.69 0.64 8477
4 0.55 0.49 0.52 2845
5 0.48 0.38 0.43 3307
6 0.83 0.71 0.76 3495
7 0.41 0.31 0.36 4321
8 0.61 0.56 0.58 3291
accuracy 0.57 35355
macro avg 0.57 0.56 0.56 35355
weighted avg 0.57 0.57 0.57 35355
After applying PCA with 7 components training is done using KNN classifier. The accuracy dropped from 57% to 53% compared to the original KNN model (without PCA). This indicates the reduced feature space didn't preserve enough information for genre classification.